from docx import Document
from docx.table import Table
from prettytable import PrettyTable
doc = Document('/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/tabel/test2.docx')


text_before_first_table = []
found_first_table = False

# 遍历文档元素
for block in doc.element.body:
    if block.tag.endswith('p'):
        p = block.text
        print(p)
    elif block.tag.endswith('tbl'):
        # 读取表格
        table = Table(block, doc)  # 将XML元素包装成Table对象
        for i, row in enumerate(table.rows):
            rows = []
            for j, cell in enumerate(row.cells):
                rows.append(cell.text)
            if i==0:
                table_print = PrettyTable()
                table_print.align = "l"                
                table_print.add_row(rows)
            else:
                table_print.add_row(rows)
        print(table_print)
    else:
        print(table.text)